State of the Ocean report

Dependencies

library(dplyr)
library(ggplot2)
library(sf)
library(robis)
library(rnaturalearth)
library(stringr)
library(cartomisc)
library(arrow)
library(nngeo)
library(RColorBrewer)

Background spatial data

countries <- ne_countries(returnclass = "sf")
land <- landr::get_land_polygons(simplified = TRUE) %>%
  st_simplify(dTolerance = 30000) %>%
  filter(!st_is_empty(geometry))

Oceans and seas

if (!file.exists("data/goas.shp")) {
  download.file("http://geo.vliz.be/geoserver/MarineRegions/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=MarineRegions:goas&maxFeatures=50&outputFormat=SHAPE-ZIP", "data/goas.zip")
  unzip("data/goas.zip", exdir = "data")
}

goas <- st_read("data/goas.shp", options = "ENCODING=UTF-8", quiet = TRUE)

sf_use_s2(FALSE)

goas_simplified <- goas %>%
  st_simplify(dTolerance = 0.1) %>%
  nngeo::st_remove_holes() %>%
  select(name) %>%
  st_set_crs(NA) %>%
  st_segmentize(1) %>%
  st_set_crs(4326)

Type localities dataset

localities <- occurrence(datasetid = "b74b429a-4052-4f5b-bff3-fe0b5a2e8669") %>%
  st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)

breaks <- c(1800, 1900, 1950, 2000, 2100)
labels <- head(paste0(breaks, " - ", breaks[2:length(breaks)]), -1)
labels <- str_replace(labels, "2100", "")
labels <- str_replace(labels, "1800", "")

localities <- localities %>%
  mutate(period = cut(date_year, breaks = breaks, labels = labels))

ggplot() +
  geom_sf(data = land, fill = NA, color = "#000000", size = 0.1) +
  geom_sf(data = localities %>% filter(!is.na(period)), size = 0.3, color = "#000000") +
  coord_sf(crs = "ESRI:54030") +
  theme_void() +
  facet_wrap(~period, ncol = 2)

ggsave("output/type_localities_map.png", height = 6, width = 10, dpi = 400, bg = "white")

Version with Oceans and Seas polygons:

ggplot() +
  geom_sf(data = goas_simplified, aes(fill = name), size = 0) +
  geom_sf(data = land, fill = "#ffffff", color = "#000000", size = 0.1) +
  geom_sf(data = localities %>% filter(!is.na(period)), size = 0.3, color = "#000000") +
  scale_fill_brewer(palette = "Paired") +
  coord_sf(crs = "ESRI:54030") +
  theme_void() +
  theme(legend.position = "none") +
  facet_wrap(~period, ncol = 2)

ggsave("output/type_localities_map_oceans.png", height = 6, width = 10, dpi = 400, bg = "white")

Join type localities with Oceans and Seas data shapefile:

localities$goas <- goas_simplified$name[st_nearest_feature(localities, goas_simplified, check_crs = TRUE)]

ggplot() +
  geom_sf(data = land, fill = NA, color = "#000000", size = 0.1) +
  geom_sf(data = localities %>% filter(!is.na(period)), aes(color = goas), size = 0.3) +
  coord_sf(crs = "ESRI:54030") +
  theme_void() +
  scale_color_brewer(palette = "Paired") +
  guides(color = guide_legend(title = "Oceans and Seas")) +
  facet_wrap(~period, ncol = 2)

ggsave("output/type_localities_map_color.png", height = 4, width = 9, dpi = 400, bg = "white")

Frequency table:

localities_cleaned <- localities %>%
  as.data.frame() %>%
  filter(!is.na(period))

stats <- round(table(localities_cleaned$goas, localities_cleaned$period) / nrow(localities_cleaned) * 100, digits = 2)
stats %>% knitr::kable()
- 1900 1900 - 1950 1950 - 2000 2000 -
Arctic Ocean 0.51 0.36 0.45 0.31
Baltic Sea 0.01 0.02 0.05 0.07
Indian Ocean 1.11 2.72 7.67 2.10
Mediterranean Region 1.14 0.26 2.24 1.16
North Atlantic Ocean 2.79 2.86 8.83 3.83
North Pacific Ocean 0.79 3.46 7.50 6.67
South Atlantic Ocean 0.54 1.09 2.51 3.59
South China and Easter Archipelagic Seas 1.10 0.76 1.24 1.67
South Pacific Ocean 1.46 2.06 17.03 6.74
Southern Ocean 0.22 1.20 1.06 0.83
stats <- localities_cleaned %>%
  group_by(period, goas) %>%
  summarize(n = n())

ggplot(stats) +
  geom_bar(aes(x = period, y = n, fill = goas), stat = "identity") +
  scale_fill_brewer(palette = "Paired") +
  guides(fill = guide_legend(title = "Oceans and Seas"))

ggsave("output/type_localities_barplot.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)

ggplot(stats) +
  geom_bar(aes(x = period, y = n, fill = goas), stat = "identity", position = "dodge") +
  scale_fill_brewer(palette = "Paired") +
  guides(fill = guide_legend(title = "Oceans and Seas"))

ggsave("output/type_localities_barplot_dodge.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)

Occurrence statistics

Load all OBIS occurrences from S3:

space <- S3FileSystem$create(
  anonymous = TRUE,
  scheme = "https",
  endpoint_override = "ams3.digitaloceanspaces.com"
)

occ <- open_dataset(space$path("obis-datasets/exports/obis_20220208.parquet")) %>%
  select(date_year, species, decimalLongitude, decimalLatitude) %>%
  as_tibble()

Join with Oceans and Seas:

if (!file.exists("data/occ_joined.dat")) {
  occ_coords <- occ %>%
    distinct(decimalLongitude, decimalLatitude) %>%
    st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4326, remove = FALSE)
  occ_joined <- st_join(occ_coords, goas_simplified, join = st_within)
  remove(occ_coords)
  save(occ_joined, file = "data/occ_joined.dat")
} else {
  load("data/occ_joined.dat")
}

occ <- occ %>%
  left_join(occ_joined %>% as.data.frame() %>% select(decimalLongitude, decimalLatitude, name), by = c("decimalLongitude", "decimalLatitude"))

remove(occ_joined)

Verify the results by sampling some points:

ggplot() +
  geom_sf(data = land, fill = NA, color = "#000000", size = 0.1) +
  geom_sf(data = occ %>% sample_n(10000) %>% st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4326), aes(color = name), size = 0.3) +
  coord_sf(crs = "ESRI:54030") +
  theme_void() +
  scale_color_brewer(palette = "Paired") +
  guides(color = guide_legend(title = "Oceans and Seas"))

Statistics:

stats <- occ %>%
  group_by(name) %>%
  summarize(records = n(), species = length(unique(species)))

stats
## # A tibble: 11 × 3
##    name                                      records species
##    <chr>                                       <int>   <int>
##  1 Arctic Ocean                              1764507    7123
##  2 Baltic Sea                                4191929    3487
##  3 Indian Ocean                              9436480   42423
##  4 Mediterranean Region                      1408772   11221
##  5 North Atlantic Ocean                     34488793   40844
##  6 North Pacific Ocean                      12854241   36149
##  7 South Atlantic Ocean                      2253432   19329
##  8 South China and Easter Archipelagic Seas   338102   15722
##  9 South Pacific Ocean                      23096673   50029
## 10 Southern Ocean                            2337205    8143
## 11 <NA>                                      7544341   52037

Statistics over time:

stats <- occ %>%
  filter(date_year >= 1900 & !is.na(name)) %>%
  group_by(name, date_year) %>%
  summarize(records = n(), species = length(unique(species))) %>%
  ungroup() %>%
  tidyr::complete(name, date_year)

ggplot(stats %>% filter(!is.na(name))) +
  geom_smooth(aes(x = date_year, y = records, color = name), fill = NA, method = "gam") +
  geom_point(aes(x = date_year, y = records, color = name)) +
  scale_color_brewer(palette = "Paired") +
  guides(color = guide_legend(title = "Oceans and Seas")) +
  scale_y_continuous(trans = "log10")

ggsave("output/records_time.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)

ggplot(stats %>% filter(!is.na(name))) +
  geom_smooth(aes(x = date_year, y = species, color = name), fill = NA, method = "gam") +
  geom_point(aes(x = date_year, y = species, color = name)) +
  scale_color_brewer(palette = "Paired") +
  guides(color = guide_legend(title = "Oceans and Seas"))

ggsave("output/species_time.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)

ggplot(stats %>% filter(!is.na(name))) +
  geom_smooth(aes(x = date_year, y = species, color = name), fill = NA, method = "gam") +
  geom_point(aes(x = date_year, y = species, color = name)) +
  scale_color_brewer(palette = "Paired") +
  guides(color = guide_legend(title = "Oceans and Seas")) +
  scale_y_continuous(trans = "log10")

ggsave("output/species_time_log.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)

pal <- rev(brewer.pal(11, "RdBu"))

ggplot(stats %>% filter(!is.na(name))) +
  geom_tile(aes(x = date_year, y = 1, fill = species)) +
  scale_fill_gradientn(colors = pal, na.value = pal[1]) +
  facet_wrap(~name, ncol = 3) +
  theme_minimal() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave("output/species_stripes.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)

Cumulative species:

stats_cumulative <- occ %>%
  filter(!is.na(name) & !is.na(species) & !is.na(date_year)) %>%
  group_by(name, species) %>%
  summarize(date_year = min(date_year)) %>%
  group_by(name, date_year) %>%
  summarize(species = n()) %>%
  arrange(name, date_year) %>%
  mutate(cumulative_species = cumsum(species))

ggplot(stats_cumulative) +
  geom_line(aes(x = date_year, y = cumulative_species, color = name), size = 1) +
  scale_color_brewer(palette = "Paired") +
  guides(color = guide_legend(title = "Oceans and Seas")) +
  xlim(c(1900, 2025))

ggsave("output/species_cumulative.png", height = 4, width = 7, dpi = 300, bg = "white", scale = 1.5)